Reddit

Project Members:

  • Omar Alkhayyat (2121118117)
  • Ali Behbehani (2132131280)
  • Essa Alansari (2121117232)
In [1]:
import requests
import praw
import pandas as pd
import sys
import os
import numpy as np
from pandas import datetime
import altair as alt
from altair import *
In [2]:
reddit = praw.Reddit(client_id='UJ7cIcf2yoZSNg',
                    client_secret='SC1SH-Jc3dqwhjeplg8sWGqRS-g'
                    ,password='abdullah-10' ,
                    user_agent='kuwaituniversity' ,
                    username='omaralkhayyat')
Version 5.2.0 of praw is outdated. Version 5.3.0 was released Sunday December 17, 2017.
In [3]:
subriddet = reddit.subreddit('Popular')
In [4]:
authors = [x.author for x in subriddet.gilded()]
In [12]:
submission_dfs = []

for a in authors:
    # check if the csv file for the author name exists
    csv_file_name = "{}_submissions.csv".format(a.name)
    # the file exists, so lets load the csv as a dataframe
    if os.path.exists(csv_file_name):
        print("found {}, I will load the dataframe".format(csv_file_name))
        df = pd.read_csv(csv_file_name)
    else:
        print("did not find {}, loading from api".format(csv_file_name))
        
        #load submissions from reddit
        author_submissions = reddit.redditor(a.name).submissions

        # fetch the information for each submission of top 100 hot submissions
        sub_data = [
            {
                "title":x.title,
                "score":x.score,
                "num_comments":x.num_comments,
                "selftext":x.selftext,
                "ID":x.id,
                "Author":x.author.name,
                "Submission_Created":x.created_utc
            } for x in author_submissions.hot(limit=100)
        ]
        
    
        # create dataframe for the submissions of that author
        df = pd.DataFrame(sub_data)
                
        # store it, so we dont fetch it next time
        df.to_csv(csv_file_name)
    
    # add the author submission df to submission_dfs
    submission_dfs.append(df)

# loop ended, now combine all the dfs into a single df
subs_df = pd.concat(submission_dfs)
did not find jorshrod_submissions.csv, loading from api
did not find attorneyriffic_submissions.csv, loading from api
did not find MasterBac_submissions.csv, loading from api
did not find AstralWave_submissions.csv, loading from api
did not find Chinguentes_submissions.csv, loading from api
did not find gazlegeoff_submissions.csv, loading from api
did not find outer_sect_disciple_submissions.csv, loading from api
did not find Cassian_Andor_submissions.csv, loading from api
did not find boglehead28_submissions.csv, loading from api
did not find GuitarCFD_submissions.csv, loading from api
did not find Max_Trollbot__submissions.csv, loading from api
did not find bwburke94_submissions.csv, loading from api
did not find PM_Me_YourFav_Song_submissions.csv, loading from api
did not find mrmattolsen_submissions.csv, loading from api
did not find flakingnapstich_submissions.csv, loading from api
did not find DoctorWhosOnFirst_submissions.csv, loading from api
did not find Paulingtons_submissions.csv, loading from api
did not find emaG_ehT_submissions.csv, loading from api
did not find Wiknetti_submissions.csv, loading from api
did not find diggdeserter_submissions.csv, loading from api
did not find shy_aries_submissions.csv, loading from api
did not find Rekdon_submissions.csv, loading from api
did not find Alastair789_submissions.csv, loading from api
did not find mickeyquicknumbers_submissions.csv, loading from api
did not find CineFunk_submissions.csv, loading from api
did not find bakonydraco_submissions.csv, loading from api
did not find mcbain_101_submissions.csv, loading from api
did not find VideoGameJeff_submissions.csv, loading from api
did not find sodapop66_submissions.csv, loading from api
did not find dibbiluncan_submissions.csv, loading from api
did not find MarbledNightmare_submissions.csv, loading from api
did not find toobs623_submissions.csv, loading from api
did not find RamsesThePigeon_submissions.csv, loading from api
did not find thecricketnerd_submissions.csv, loading from api
did not find mcpat21_submissions.csv, loading from api
did not find TeckFire_submissions.csv, loading from api
did not find likta_submissions.csv, loading from api
did not find DrJWilson_submissions.csv, loading from api
did not find JonathanJK_submissions.csv, loading from api
did not find dweezil22_submissions.csv, loading from api
did not find Ihavesomethingtosay9_submissions.csv, loading from api
did not find JustSomeBadAdvice_submissions.csv, loading from api
did not find zazzlekdazzle_submissions.csv, loading from api
did not find -Travis_submissions.csv, loading from api
did not find Mongolian_Hamster_submissions.csv, loading from api
did not find GeneralissimoFranco_submissions.csv, loading from api
did not find mettmerizing_submissions.csv, loading from api
did not find kaivman_submissions.csv, loading from api
did not find KingTyr_submissions.csv, loading from api
did not find Bastiat_submissions.csv, loading from api
did not find LesPantalonsFancie_submissions.csv, loading from api
did not find MaceWumpus_submissions.csv, loading from api
did not find Egon_1_submissions.csv, loading from api
did not find Unbelievabob_submissions.csv, loading from api
did not find lmDefiniteIyLying_submissions.csv, loading from api
did not find suchgarbage_submissions.csv, loading from api
did not find GeekyMeerkat_submissions.csv, loading from api
did not find JoeReMi_submissions.csv, loading from api
did not find BlargyMcBlargFace_submissions.csv, loading from api
did not find brentg454_submissions.csv, loading from api
did not find strained_brain_submissions.csv, loading from api
did not find captainmagictrousers_submissions.csv, loading from api
did not find lun471k_submissions.csv, loading from api
did not find budgravy_submissions.csv, loading from api
did not find left_accelerationist_submissions.csv, loading from api
did not find neoliberal_agenda_submissions.csv, loading from api
did not find solar_compost_submissions.csv, loading from api
did not find cjet79_submissions.csv, loading from api
did not find qualityandaudit_submissions.csv, loading from api
did not find tallmon_submissions.csv, loading from api
did not find dust_wind_submissions.csv, loading from api
did not find KVMechelen_submissions.csv, loading from api
did not find FarawayFairways_submissions.csv, loading from api
did not find CabBeats_submissions.csv, loading from api
did not find Domri_Rade_submissions.csv, loading from api
did not find BulTV_submissions.csv, loading from api
did not find Woofers_MacBarkFloof_submissions.csv, loading from api
did not find headoverheals_submissions.csv, loading from api
did not find JuliettPapaRomeo_submissions.csv, loading from api
did not find amheekin_submissions.csv, loading from api
did not find DarkSpace-Harbinger_submissions.csv, loading from api
did not find _UpstateNYer__submissions.csv, loading from api
did not find IAm_TulipFace_submissions.csv, loading from api
did not find ScaryKerry91476_submissions.csv, loading from api
did not find Goingsnakesilver_submissions.csv, loading from api
did not find HardlightCereal_submissions.csv, loading from api
did not find sgthombre_submissions.csv, loading from api
did not find ManWithABigDick_submissions.csv, loading from api
did not find kaneki-shinobu_submissions.csv, loading from api
did not find Unofficial_Player_submissions.csv, loading from api
did not find LookAnts_submissions.csv, loading from api
did not find dead137_submissions.csv, loading from api
did not find mthilliard_submissions.csv, loading from api
did not find amusicalgirl93_submissions.csv, loading from api
did not find ALLSTARTRIPOD_submissions.csv, loading from api
did not find BigHouseMaiden_submissions.csv, loading from api
did not find Tman7182_submissions.csv, loading from api
did not find miketwo345_submissions.csv, loading from api
did not find Timbitkiller_submissions.csv, loading from api
did not find bilde2910_submissions.csv, loading from api
In [13]:
subs_df.head()
Out[13]:
Author ID Submission_Created num_comments score selftext title
0 jorshrod 7l56sb 1.513811e+09 362 4804 I was having an EMG test today and started tal... How much bandwidth does the spinal cord have?
1 jorshrod 7l4b8z 1.513803e+09 11 1 [My IGS REP](https://www.reddit.com/r/IGSRep/c... [H] Wasteland 2: Director's Cut, Furi, Worms W...
2 jorshrod 7l48mr 1.513802e+09 2 1 [FIRST PAGE](https://www.reddit.com/r/IGSRep/c... Jorshrod's IGS Rep Page 2
3 jorshrod 7kmbct 1.513615e+09 8 1 [My IGS REP](https://www.reddit.com/r/IGSRep/c... [H] Wasteland 2: Directors Cut, BlazBlue: Chro...
4 jorshrod 7j47a0 1.513018e+09 4 1 [My IGS REP](https://www.reddit.com/r/IGSRep/c... [H]Wasteland 2:Director's, Furi, H1Z1, Offworl...
In [14]:
subs_df.Submission_Created = subs_df.Submission_Created.apply(datetime.fromtimestamp)
In [27]:
subs_df.to_csv("subs_df.csv")
In [17]:
cd_data = [
   {
       "name":x.name,
       "comment_karma":x.comment_karma,
       "post_karma":x.link_karma,
       "is_gold":x.is_gold,
       "is_mod":x.is_mod,
       "ID":x.id,
       "is employee":x.is_employee,
       "is_friend":x.is_friend,
       
       
   }
       
   for x in authors]
In [18]:
cdf = pd.DataFrame(cd_data)
In [20]:
cdf.head()
Out[20]:
ID comment_karma is employee is_friend is_gold is_mod name post_karma
0 4amx5 11338 False False True False jorshrod 9785
1 60siq 33855 False False True False attorneyriffic 903
2 mnal5 294 False False True False MasterBac 3699
3 11absx 891 False False True False AstralWave 10858
4 6k069 254 False False True False Chinguentes 8905
In [21]:
cdf.to_csv("cdf.csv")
In [26]:
cdf
Out[26]:
Created ID comment_karma is employee is_friend is_gold is_mod name post_karma
0 2011-04-12 05:28:43 53a1t 51482 False False True False Strid3r21 55980
1 2013-09-01 08:23:27 cyv1d 34593 False False True False Phineasfogg 34
2 2016-12-02 15:47:04 1397sf 10260 False False True False SoWhatDidIMiss 1114
3 2015-05-10 15:45:38 ne42w 32449 False False True True Baz-Ravish 8148
4 2008-02-13 18:59:48 3370f 20585 False False True False papercup 13
5 2013-07-06 00:35:06 c9xtv 5461 False False True False runedrune 2685
6 2016-07-16 09:10:54 zjlzm 1825 False False True False theboredlockpicker 3219
7 2017-07-11 01:44:50 6ot8gst 1788 False False True True antonfabijan 10074
8 2016-07-14 21:08:03 zi9rv 9126 False False True False JamieBGood1 501
9 2012-03-15 17:42:23 76fd9 59351 False False True True Pyrolytic 10678
10 2011-05-19 03:06:14 596ov 97299 False False True False foreveracubone 11898
11 2011-05-07 23:47:56 57d8x 2624750 False False True True _vargas_ 119749
12 2011-01-09 15:22:40 4pfib 27241 False False True True Deezl-Vegas 1525
13 2011-11-30 14:21:53 6cc9f 101345 False False True True amgov 10001
14 2011-04-10 11:54:08 5300k 26514 False False True True NoMoreBoozePlease 1583
15 2013-09-09 15:12:01 d2gxt 261 False False True False yo0han 296
16 2016-12-07 19:18:52 13e0bc 4023 False False True False BearKilgore 21747
17 2016-06-26 04:57:47 z0n23 3210 False False True False blorfie 6238
18 2015-11-01 02:02:36 rmpx2 1 False False True False Eigengraulogy 2926
19 2013-01-09 04:20:55 a5kq5 50804 False False True False GreatNorthWeb 378
20 2012-09-14 07:59:13 906f4 2130 False False True False Vicariousness 2563
21 2017-01-14 15:45:20 14fg9m 6323 False False True False muddy651 9
22 2015-08-01 00:46:51 p7xgy 398 False False True True VenusSparkle 14399
23 2016-12-24 21:56:20 13tvxl 1202 False False True False dojobrady 5
24 2014-05-27 17:07:26 gqh2m 3208 False False True False neogeo5185 118
25 2017-11-05 15:08:46 jvmpjv4 3349 False False True False SixStringerSoldier 664
26 2017-02-08 07:53:35 156sz4 81994 False False True True BerryBrickle 26007
27 2014-05-18 19:43:57 gm8hr 55252 False False True False contentBat 1805
28 2016-05-31 21:40:33 ycpwn 3117 False False True False alanrickmanisdead 1
29 2014-12-24 22:33:04 kat4q 15309 False False True True necajitaaa 4049
... ... ... ... ... ... ... ... ... ...
70 2012-06-07 01:29:24 7x409 14692 False False True False Mande1baum 244
71 2017-12-17 22:57:38 onrvuue 36 False False True False eb991 193
72 2016-07-23 14:41:17 zqv5c 7348 False False True True mokujinx 11497
73 2008-05-23 00:42:55 35h0a 53777 False False True True catsfive 23420
74 2015-04-05 04:37:19 mpi3b 1958 False False True False TrontheTechie 29
75 2017-06-18 01:33:32 4dok57o 35 False False True False oknicegoodyes 1
76 2015-03-12 14:02:54 m3f53 1427 False False True False Mikeinthehouse 17
77 2016-01-27 01:38:22 u551h 8640 False False True False BlindTiger86 2579
78 2010-03-22 02:32:03 3yb46 16351 False False True False RZephyr07 2184
79 2017-05-14 00:50:07 11fo9a3 2927 False False True False ijustwantspaghetti 85
80 2012-08-10 00:22:52 8m8sc 133102 False False True True dolphinesque 586
81 2015-12-23 22:51:38 t6qb3 3245 False False True True MaitieS 8209
82 2016-09-18 18:05:31 11hu62 14058 False False True False Aesen1 8505
83 2016-10-19 19:57:13 128njv 146857 False False True False SuicideBonger 14395
84 2017-05-26 14:38:28 27i1nkd 6391 False False True False hates_stupid_people 1
85 2015-12-27 22:24:11 t9rl2 11989 False False True True thedrizzle126 2459
86 2015-01-04 09:24:52 khwq5 4849 False False True False alchemy3083 1
87 2017-10-24 04:40:50 if5mggm 1227 False False True False manoftwoway 106
88 2012-11-05 13:06:13 9ibo8 50768 False False True True JackXDark 4335
89 2012-09-29 09:16:26 95qob 255 False False True False Paralysed 210
90 2017-10-26 13:08:12 iow88om 1804 False False True False HighOnEth 1
91 2013-08-30 09:49:25 cy3j5 6114 False False True False benfreilich 74
92 2013-03-24 07:36:21 b1zre 5915 False False True False haloumi_pants 769
93 2017-10-16 12:13:34 6i2man 1319 False False True False canContinue 83
94 2015-08-08 12:44:45 peaym 513 False False True False DadAsFuck 177
95 2010-10-23 02:01:29 4gc95 52559 False False True False seemonkey 5608
96 2012-07-24 19:36:12 8fqg9 217174 False False True True tommy2014015 68236
97 2014-02-25 20:00:38 fg1hg 910 False False True True mikerich15 2236
98 2013-10-19 23:34:58 dkora 44780 False False True False BigBobbert 21132
99 2014-07-27 01:42:27 hl4s0 15828 False False True False Weasel2113 6417

100 rows × 9 columns

In [22]:
final_df = cdf.merge(subs_df, how="outer", left_on="name", right_on="Author")
In [23]:
final_df
Out[23]:
ID_x comment_karma is employee is_friend is_gold is_mod name post_karma Author ID_y Submission_Created num_comments score selftext title
0 4amx5 11338 False False True False jorshrod 9785 jorshrod 7l56sb 2017-12-21 02:01:29 362.0 4804.0 I was having an EMG test today and started tal... How much bandwidth does the spinal cord have?
1 4amx5 11338 False False True False jorshrod 9785 jorshrod 7l4b8z 2017-12-20 23:51:33 11.0 1.0 [My IGS REP](https://www.reddit.com/r/IGSRep/c... [H] Wasteland 2: Director's Cut, Furi, Worms W...
2 4amx5 11338 False False True False jorshrod 9785 jorshrod 7l48mr 2017-12-20 23:41:24 2.0 1.0 [FIRST PAGE](https://www.reddit.com/r/IGSRep/c... Jorshrod's IGS Rep Page 2
3 4amx5 11338 False False True False jorshrod 9785 jorshrod 7kmbct 2017-12-18 19:36:24 8.0 1.0 [My IGS REP](https://www.reddit.com/r/IGSRep/c... [H] Wasteland 2: Directors Cut, BlazBlue: Chro...
4 4amx5 11338 False False True False jorshrod 9785 jorshrod 7j47a0 2017-12-11 21:38:34 4.0 1.0 [My IGS REP](https://www.reddit.com/r/IGSRep/c... [H]Wasteland 2:Director's, Furi, H1Z1, Offworl...
5 4amx5 11338 False False True False jorshrod 9785 jorshrod 7i9nqu 2017-12-08 00:14:56 0.0 1.0 [My IGS REP](https://www.reddit.com/r/IGSRep/c... [H] Wasteland 2: Directors Cut, Furi, Sexy Bru...
6 4amx5 11338 False False True False jorshrod 9785 jorshrod 7i8pny 2017-12-07 21:54:22 1.0 0.0 [removed] When does the season review come out? Did I mi...
7 4amx5 11338 False False True False jorshrod 9785 jorshrod 7hw45q 2017-12-06 08:58:53 3.0 29.0 I've been reflecting this week on the differen... 34 weeks gone for #3, still doesn't feel real!
8 4amx5 11338 False False True False jorshrod 9785 jorshrod 7hvmn2 2017-12-06 07:23:00 13.0 0.0 I'm wanting to buy the Revan Reborn set, but o... Best way to turn Cartel coins into Credits?
9 4amx5 11338 False False True False jorshrod 9785 jorshrod 7htsok 2017-12-06 02:27:33 12.0 1.0 [My IGS REP](https://www.reddit.com/r/IGSRep/c... [H] Wasteland 2: Director's Cut, Furi, H1Z1, O...
10 4amx5 11338 False False True False jorshrod 9785 jorshrod 7gx6d7 2017-12-01 22:00:08 4.0 3.0 I have a Ubiquity Edgerouter Lite as my gatewa... Configuring VPN at router? Options?
11 4amx5 11338 False False True False jorshrod 9785 jorshrod 7gwryx 2017-12-01 21:03:51 3.0 1.0 [My IGS REP](https://www.reddit.com/r/IGSRep/c... [H] Entire Dec Bundle [W] Entire Jan bundle or...
12 4amx5 11338 False False True False jorshrod 9785 jorshrod 7gwbws 2017-12-01 20:01:38 2.0 1.0 [My IGS REP](https://www.reddit.com/r/IGSRep/c... [H] H1Z1, Furi, Dec Bundle [W] Games from my l...
13 4amx5 11338 False False True False jorshrod 9785 jorshrod 7gwr75 2017-12-01 21:00:46 1.0 0.0 December Humble Monthly Games Revealed, includ...
14 4amx5 11338 False False True False jorshrod 9785 jorshrod 7g28ym 2017-11-28 09:44:15 6.0 2.0 Re-ascending this week with a new 21:9 monitor...
15 4amx5 11338 False False True False jorshrod 9785 jorshrod 7fqqrn 2017-11-27 02:29:07 1.0 1.0 I'm getting a new LG 34" ultrawide monitor thi... Recommended Monitor Backlight Solutions
16 4amx5 11338 False False True False jorshrod 9785 jorshrod 7bwgkm 2017-11-10 00:25:05 6.0 2.0 I've finished building my UCS Falcon, a set I'... Ideas for displaying the UCS Falcon?
17 4amx5 11338 False False True False jorshrod 9785 jorshrod 7a9ggs 2017-11-02 08:12:36 6.0 4.0 I last played in 2015, the first 9 chapters of... Returning Player Questions (I'm sorry)
18 4amx5 11338 False False True False jorshrod 9785 jorshrod 75jkfh 2017-10-10 22:39:26 11.0 1.0 [My IGS REP](https://www.reddit.com/r/IGSRep/c... [H]Rise of the Tomb Raider, Furi, Oct Bundle, ...
19 4amx5 11338 False False True False jorshrod 9785 jorshrod 74oyng 2017-10-06 20:01:03 223.0 430.0 October Humble Monthly Games Revealed! Novembe...
20 4amx5 11338 False False True False jorshrod 9785 jorshrod 74oz6p 2017-10-06 20:03:06 63.0 75.0 October Humble Monthly Games Revealed! Novembe...
21 4amx5 11338 False False True False jorshrod 9785 jorshrod 74p9i8 2017-10-06 20:45:19 21.0 1.0 [My IGS REP](https://www.reddit.com/r/IGSRep/c... [H] Rise of the Tomb Raider, Furi, Orwell, Mos...
22 4amx5 11338 False False True False jorshrod 9785 jorshrod 73jct7 2017-10-01 06:05:53 9.0 75.0 My Lego Saturn V displayed in my home office, ...
23 4amx5 11338 False False True False jorshrod 9785 jorshrod 71ousf 2017-09-22 07:57:42 5.0 4.0 I saw the trial dropped and installed this mor... TIFU by leaving my FIFA 18 launcher running an...
24 4amx5 11338 False False True False jorshrod 9785 jorshrod 71moia 2017-09-22 01:15:00 8.0 6.0 In my teen and college years I read almost all... Getting back into Star Wars books, looking for...
25 4amx5 11338 False False True False jorshrod 9785 jorshrod 71md63 2017-09-22 00:25:28 11.0 2.0 [removed] Current Star Wars Cannon Books
26 4amx5 11338 False False True False jorshrod 9785 jorshrod 700f9q 2017-09-14 09:03:16 7.0 9.0 I have been lightly paying attention to the LI... What is the likelihood of the LIGO Gravity Wav...
27 4amx5 11338 False False True False jorshrod 9785 jorshrod 6zsgt4 2017-09-13 07:51:04 2.0 1.0 I played a lot of Enemy Unknown and Enemy With... Help with CAmpaign Strategy
28 4amx5 11338 False False True False jorshrod 9785 jorshrod 6ynj4z 2017-09-07 17:26:58 17.0 1.0 [IGS Rep](https://www.reddit.com/r/IGSRep/comm... [H] Eterium, Killing Floor 2, Stories Untold, ...
29 4amx5 11338 False False True False jorshrod 9785 jorshrod 6ydwzh 2017-09-06 09:21:28 28.0 3.0 Wife and I are coming to an impasse on naming ... Leftover and discarded names request (female)
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
5207 bbuks 44880 False False True True bilde2910 118898 bilde2910 51xzfn 2016-09-09 18:31:03 2.0 18.0 I'm having several peers in my peer list showi... What is the T flag?
5208 bbuks 44880 False False True True bilde2910 118898 bilde2910 505r9d 2016-08-29 19:09:15 5.0 120.0 Chicago ✈ Stockholm on SAS; 37,000 ft in the air
5209 bbuks 44880 False False True True bilde2910 118898 bilde2910 4z23gf 2016-08-22 21:49:41 2.0 172.0 In 500 meters, leave the highway, then re-ente...
5210 bbuks 44880 False False True True bilde2910 118898 bilde2910 4xkxvg 2016-08-13 23:50:24 9.0 3.0 When you walk on a pier that is not on the map
5211 bbuks 44880 False False True True bilde2910 118898 bilde2910 4ws7uz 2016-08-08 23:12:26 0.0 0.0 me irl
5212 bbuks 44880 False False True True bilde2910 118898 bilde2910 4w03yz 2016-08-03 21:46:39 6.0 9.0 Has anyone else gotten this error message befo...
5213 bbuks 44880 False False True True bilde2910 118898 bilde2910 4to1ke 2016-07-20 02:01:31 0.0 1.0 Happens with 100% consistency. I have no probl... "Submit changes" button when editing comments ...
5214 bbuks 44880 False False True True bilde2910 118898 bilde2910 4smcm1 2016-07-13 12:49:17 2.0 13.0 An abandoned WWII bunker in France | by Anita ...
5215 bbuks 44880 False False True True bilde2910 118898 bilde2910 4smpeb 2016-07-13 14:39:10 1.0 5.0 xkcd on Pokémon Go
5216 bbuks 44880 False False True True bilde2910 118898 bilde2910 4s575r 2016-07-10 14:52:25 0.0 2.0 It kind of looks like a toad, and won't respon... Can someone help me catch this pokémon?
5217 bbuks 44880 False False True True bilde2910 118898 bilde2910 4s2dg9 2016-07-10 00:31:07 0.0 5.0 Availo EsperNet states in their MotD the follo... Availo EsperNet's really old theme songs
5218 bbuks 44880 False False True True bilde2910 118898 bilde2910 4repbj 2016-07-05 23:25:33 12.0 65.0 In the world of Windows, cancelling a file tra...
5219 bbuks 44880 False False True True bilde2910 118898 bilde2910 4pgrrr 2016-06-23 16:37:45 0.0 2.0 Groupees currently has an offer on trailer mus...
5220 bbuks 44880 False False True True bilde2910 118898 bilde2910 4ocaeg 2016-06-16 11:12:11 8.0 4.0 The YAMPST statistics tracking system for modp...
5221 bbuks 44880 False False True True bilde2910 118898 bilde2910 4o1d1x 2016-06-14 16:23:34 9.0 2.0 Everyone of your profession decides to do the ...
5222 bbuks 44880 False False True True bilde2910 118898 bilde2910 4nh8hs 2016-06-10 19:52:47 7.0 49.0 Comic Sans is the best CAPTCHA
5223 bbuks 44880 False False True True bilde2910 118898 bilde2910 4n58tp 2016-06-08 17:19:44 9.0 71.0 How to let everyone know that your site is vul...
5224 bbuks 44880 False False True True bilde2910 118898 bilde2910 4lhnyr 2016-05-29 00:36:48 211.0 657.0 Hi. I know that this post will be quite a wall... I'm worried about current trends in the Minecr...
5225 bbuks 44880 False False True True bilde2910 118898 bilde2910 4lhov5 2016-05-29 00:43:10 93.0 159.0 I'm worried about current trends in the Minecr...
5226 bbuks 44880 False False True True bilde2910 118898 bilde2910 4lbt77 2016-05-27 20:24:28 1.0 0.0 "Reward: 2 Emergencies"
5227 bbuks 44880 False False True True bilde2910 118898 bilde2910 4kzde5 2016-05-25 15:25:03 560.0 1803.0 h3h3productions [has been sued](https://www.yo... h3h3productions sued for copyright infringemen...
5228 bbuks 44880 False False True True bilde2910 118898 bilde2910 4l1ycv 2016-05-26 00:23:28 0.0 0.0 I experimented with timelapses a while ago. I ...
5229 bbuks 44880 False False True True bilde2910 118898 bilde2910 4kmwcs 2016-05-23 10:50:04 87.0 1148.0 I'm just casually browsing the web using Inter...
5230 bbuks 44880 False False True True bilde2910 118898 bilde2910 4kd0na 2016-05-21 13:19:48 1.0 7.0 A Minecraft server instance launched on Twitch...
5231 bbuks 44880 False False True True bilde2910 118898 bilde2910 4ji5rm 2016-05-16 00:47:32 0.0 5.0 Trailer driving through frozen wastelands
5232 bbuks 44880 False False True True bilde2910 118898 bilde2910 4jhmw1 2016-05-15 22:46:36 0.0 1.0 [removed] LPT: If there are a handful of special charact...
5233 bbuks 44880 False False True True bilde2910 118898 bilde2910 4jb2zh 2016-05-14 14:38:08 1.0 1.0 [removed] This is not an AMA.
5234 bbuks 44880 False False True True bilde2910 118898 bilde2910 4j2ee4 2016-05-12 22:29:40 1.0 21.0 The Norwegian Polar Institute has a database o...
5235 bbuks 44880 False False True True bilde2910 118898 bilde2910 4ibtml 2016-05-08 00:29:22 40.0 120.0 [removed] I made a thing that lets you share Steam key g...
5236 bbuks 44880 False False True True bilde2910 118898 bilde2910 4iaxow 2016-05-07 20:44:52 7.0 89.0 [Groupees] The Fantasy Bundle #2 | Mystery of ...

5237 rows × 15 columns

In [44]:
final_df.to_csv("final_df.csv")
In [46]:
fdf = pd.read_csv("final_df.csv")
---------------------------------------------------------------------------
UnicodeDecodeError                        Traceback (most recent call last)
pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._convert_tokens (pandas\_libs\parsers.c:14858)()

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._convert_with_dtype (pandas\_libs\parsers.c:17119)()

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._string_convert (pandas\_libs\parsers.c:17347)()

pandas\_libs\parsers.pyx in pandas._libs.parsers._string_box_utf8 (pandas\_libs\parsers.c:23041)()

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe0 in position 8: invalid continuation byte

During handling of the above exception, another exception occurred:

UnicodeDecodeError                        Traceback (most recent call last)
<ipython-input-46-ee8926dcf4eb> in <module>()
----> 1 fdf = pd.read_csv("final_df.csv")

C:\Users\omar\Anaconda3\lib\site-packages\pandas\io\parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)
    653                     skip_blank_lines=skip_blank_lines)
    654 
--> 655         return _read(filepath_or_buffer, kwds)
    656 
    657     parser_f.__name__ = name

C:\Users\omar\Anaconda3\lib\site-packages\pandas\io\parsers.py in _read(filepath_or_buffer, kwds)
    409 
    410     try:
--> 411         data = parser.read(nrows)
    412     finally:
    413         parser.close()

C:\Users\omar\Anaconda3\lib\site-packages\pandas\io\parsers.py in read(self, nrows)
    980                 raise ValueError('skipfooter not supported for iteration')
    981 
--> 982         ret = self._engine.read(nrows)
    983 
    984         if self.options.get('as_recarray'):

C:\Users\omar\Anaconda3\lib\site-packages\pandas\io\parsers.py in read(self, nrows)
   1717     def read(self, nrows=None):
   1718         try:
-> 1719             data = self._reader.read(nrows)
   1720         except StopIteration:
   1721             if self._first_chunk:

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader.read (pandas\_libs\parsers.c:10862)()

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._read_low_memory (pandas\_libs\parsers.c:11138)()

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._read_rows (pandas\_libs\parsers.c:12175)()

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._convert_column_data (pandas\_libs\parsers.c:14136)()

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._convert_tokens (pandas\_libs\parsers.c:14972)()

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._convert_with_dtype (pandas\_libs\parsers.c:17119)()

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._string_convert (pandas\_libs\parsers.c:17347)()

pandas\_libs\parsers.pyx in pandas._libs.parsers._string_box_utf8 (pandas\_libs\parsers.c:23041)()

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe0 in position 8: invalid continuation byte
In [4]:
fdf
Out[4]:
Unnamed: 0 ID_x comment_karma is employee is_friend is_gold is_mod name post_karma Author ID_y Submission_Created num_comments score selftext title
0 0 4amx5 11338 False False True False jorshrod 9785 jorshrod 7l56sb 2017-12-21 02:01:29 362.0 4804.0 I was having an EMG test today and started tal... How much bandwidth does the spinal cord have?
1 1 4amx5 11338 False False True False jorshrod 9785 jorshrod 7l4b8z 2017-12-20 23:51:33 11.0 1.0 [My IGS REP](https://www.reddit.com/r/IGSRep/c... [H] Wasteland 2: Director's Cut, Furi, Worms W...
2 2 4amx5 11338 False False True False jorshrod 9785 jorshrod 7l48mr 2017-12-20 23:41:24 2.0 1.0 [FIRST PAGE](https://www.reddit.com/r/IGSRep/c... Jorshrod's IGS Rep Page 2
3 3 4amx5 11338 False False True False jorshrod 9785 jorshrod 7kmbct 2017-12-18 19:36:24 8.0 1.0 [My IGS REP](https://www.reddit.com/r/IGSRep/c... [H] Wasteland 2: Directors Cut, BlazBlue: Chro...
4 4 4amx5 11338 False False True False jorshrod 9785 jorshrod 7j47a0 2017-12-11 21:38:34 4.0 1.0 [My IGS REP](https://www.reddit.com/r/IGSRep/c... [H]Wasteland 2:Director's, Furi, H1Z1, Offworl...
5 5 4amx5 11338 False False True False jorshrod 9785 jorshrod 7i9nqu 2017-12-08 00:14:56 0.0 1.0 [My IGS REP](https://www.reddit.com/r/IGSRep/c... [H] Wasteland 2: Directors Cut, Furi, Sexy Bru...
6 6 4amx5 11338 False False True False jorshrod 9785 jorshrod 7i8pny 2017-12-07 21:54:22 1.0 0.0 [removed] When does the season review come out? Did I mi...
7 7 4amx5 11338 False False True False jorshrod 9785 jorshrod 7hw45q 2017-12-06 08:58:53 3.0 29.0 I've been reflecting this week on the differen... 34 weeks gone for #3, still doesn't feel real!
8 8 4amx5 11338 False False True False jorshrod 9785 jorshrod 7hvmn2 2017-12-06 07:23:00 13.0 0.0 I'm wanting to buy the Revan Reborn set, but o... Best way to turn Cartel coins into Credits?
9 9 4amx5 11338 False False True False jorshrod 9785 jorshrod 7htsok 2017-12-06 02:27:33 12.0 1.0 [My IGS REP](https://www.reddit.com/r/IGSRep/c... [H] Wasteland 2: Director's Cut, Furi, H1Z1, O...
10 10 4amx5 11338 False False True False jorshrod 9785 jorshrod 7gx6d7 2017-12-01 22:00:08 4.0 3.0 I have a Ubiquity Edgerouter Lite as my gatewa... Configuring VPN at router? Options?
11 11 4amx5 11338 False False True False jorshrod 9785 jorshrod 7gwryx 2017-12-01 21:03:51 3.0 1.0 [My IGS REP](https://www.reddit.com/r/IGSRep/c... [H] Entire Dec Bundle [W] Entire Jan bundle or...
12 12 4amx5 11338 False False True False jorshrod 9785 jorshrod 7gwbws 2017-12-01 20:01:38 2.0 1.0 [My IGS REP](https://www.reddit.com/r/IGSRep/c... [H] H1Z1, Furi, Dec Bundle [W] Games from my l...
13 13 4amx5 11338 False False True False jorshrod 9785 jorshrod 7gwr75 2017-12-01 21:00:46 1.0 0.0 NaN December Humble Monthly Games Revealed, includ...
14 14 4amx5 11338 False False True False jorshrod 9785 jorshrod 7g28ym 2017-11-28 09:44:15 6.0 2.0 NaN Re-ascending this week with a new 21:9 monitor...
15 15 4amx5 11338 False False True False jorshrod 9785 jorshrod 7fqqrn 2017-11-27 02:29:07 1.0 1.0 I'm getting a new LG 34" ultrawide monitor thi... Recommended Monitor Backlight Solutions
16 16 4amx5 11338 False False True False jorshrod 9785 jorshrod 7bwgkm 2017-11-10 00:25:05 6.0 2.0 I've finished building my UCS Falcon, a set I'... Ideas for displaying the UCS Falcon?
17 17 4amx5 11338 False False True False jorshrod 9785 jorshrod 7a9ggs 2017-11-02 08:12:36 6.0 4.0 I last played in 2015, the first 9 chapters of... Returning Player Questions (I'm sorry)
18 18 4amx5 11338 False False True False jorshrod 9785 jorshrod 75jkfh 2017-10-10 22:39:26 11.0 1.0 [My IGS REP](https://www.reddit.com/r/IGSRep/c... [H]Rise of the Tomb Raider, Furi, Oct Bundle, ...
19 19 4amx5 11338 False False True False jorshrod 9785 jorshrod 74oyng 2017-10-06 20:01:03 223.0 430.0 NaN October Humble Monthly Games Revealed! Novembe...
20 20 4amx5 11338 False False True False jorshrod 9785 jorshrod 74oz6p 2017-10-06 20:03:06 63.0 75.0 NaN October Humble Monthly Games Revealed! Novembe...
21 21 4amx5 11338 False False True False jorshrod 9785 jorshrod 74p9i8 2017-10-06 20:45:19 21.0 1.0 [My IGS REP](https://www.reddit.com/r/IGSRep/c... [H] Rise of the Tomb Raider, Furi, Orwell, Mos...
22 22 4amx5 11338 False False True False jorshrod 9785 jorshrod 73jct7 2017-10-01 06:05:53 9.0 75.0 NaN My Lego Saturn V displayed in my home office, ...
23 23 4amx5 11338 False False True False jorshrod 9785 jorshrod 71ousf 2017-09-22 07:57:42 5.0 4.0 I saw the trial dropped and installed this mor... TIFU by leaving my FIFA 18 launcher running an...
24 24 4amx5 11338 False False True False jorshrod 9785 jorshrod 71moia 2017-09-22 01:15:00 8.0 6.0 In my teen and college years I read almost all... Getting back into Star Wars books, looking for...
25 25 4amx5 11338 False False True False jorshrod 9785 jorshrod 71md63 2017-09-22 00:25:28 11.0 2.0 [removed] Current Star Wars Cannon Books
26 26 4amx5 11338 False False True False jorshrod 9785 jorshrod 700f9q 2017-09-14 09:03:16 7.0 9.0 I have been lightly paying attention to the LI... What is the likelihood of the LIGO Gravity Wav...
27 27 4amx5 11338 False False True False jorshrod 9785 jorshrod 6zsgt4 2017-09-13 07:51:04 2.0 1.0 I played a lot of Enemy Unknown and Enemy With... Help with CAmpaign Strategy
28 28 4amx5 11338 False False True False jorshrod 9785 jorshrod 6ynj4z 2017-09-07 17:26:58 17.0 1.0 [IGS Rep](https://www.reddit.com/r/IGSRep/comm... [H] Eterium, Killing Floor 2, Stories Untold, ...
29 29 4amx5 11338 False False True False jorshrod 9785 jorshrod 6ydwzh 2017-09-06 09:21:28 28.0 3.0 Wife and I are coming to an impasse on naming ... Leftover and discarded names request (female)
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
5207 5207 bbuks 44880 False False True True bilde2910 118898 bilde2910 51xzfn 2016-09-09 18:31:03 2.0 18.0 I'm having several peers in my peer list showi... What is the T flag?
5208 5208 bbuks 44880 False False True True bilde2910 118898 bilde2910 505r9d 2016-08-29 19:09:15 5.0 120.0 NaN Chicago ✈ Stockholm on SAS; 37,000 ft in the air
5209 5209 bbuks 44880 False False True True bilde2910 118898 bilde2910 4z23gf 2016-08-22 21:49:41 2.0 172.0 NaN In 500 meters, leave the highway, then re-ente...
5210 5210 bbuks 44880 False False True True bilde2910 118898 bilde2910 4xkxvg 2016-08-13 23:50:24 9.0 3.0 NaN When you walk on a pier that is not on the map
5211 5211 bbuks 44880 False False True True bilde2910 118898 bilde2910 4ws7uz 2016-08-08 23:12:26 0.0 0.0 NaN me irl
5212 5212 bbuks 44880 False False True True bilde2910 118898 bilde2910 4w03yz 2016-08-03 21:46:39 6.0 9.0 NaN Has anyone else gotten this error message befo...
5213 5213 bbuks 44880 False False True True bilde2910 118898 bilde2910 4to1ke 2016-07-20 02:01:31 0.0 1.0 Happens with 100% consistency. I have no probl... "Submit changes" button when editing comments ...
5214 5214 bbuks 44880 False False True True bilde2910 118898 bilde2910 4smcm1 2016-07-13 12:49:17 2.0 13.0 NaN An abandoned WWII bunker in France | by Anita ...
5215 5215 bbuks 44880 False False True True bilde2910 118898 bilde2910 4smpeb 2016-07-13 14:39:10 1.0 5.0 NaN xkcd on Pokémon Go
5216 5216 bbuks 44880 False False True True bilde2910 118898 bilde2910 4s575r 2016-07-10 14:52:25 0.0 2.0 It kind of looks like a toad, and won't respon... Can someone help me catch this pokémon?
5217 5217 bbuks 44880 False False True True bilde2910 118898 bilde2910 4s2dg9 2016-07-10 00:31:07 0.0 5.0 Availo EsperNet states in their MotD the follo... Availo EsperNet's really old theme songs
5218 5218 bbuks 44880 False False True True bilde2910 118898 bilde2910 4repbj 2016-07-05 23:25:33 12.0 65.0 NaN In the world of Windows, cancelling a file tra...
5219 5219 bbuks 44880 False False True True bilde2910 118898 bilde2910 4pgrrr 2016-06-23 16:37:45 0.0 2.0 NaN Groupees currently has an offer on trailer mus...
5220 5220 bbuks 44880 False False True True bilde2910 118898 bilde2910 4ocaeg 2016-06-16 11:12:11 8.0 4.0 NaN The YAMPST statistics tracking system for modp...
5221 5221 bbuks 44880 False False True True bilde2910 118898 bilde2910 4o1d1x 2016-06-14 16:23:34 9.0 2.0 NaN Everyone of your profession decides to do the ...
5222 5222 bbuks 44880 False False True True bilde2910 118898 bilde2910 4nh8hs 2016-06-10 19:52:47 7.0 49.0 NaN Comic Sans is the best CAPTCHA
5223 5223 bbuks 44880 False False True True bilde2910 118898 bilde2910 4n58tp 2016-06-08 17:19:44 9.0 71.0 NaN How to let everyone know that your site is vul...
5224 5224 bbuks 44880 False False True True bilde2910 118898 bilde2910 4lhnyr 2016-05-29 00:36:48 211.0 657.0 Hi. I know that this post will be quite a wall... I'm worried about current trends in the Minecr...
5225 5225 bbuks 44880 False False True True bilde2910 118898 bilde2910 4lhov5 2016-05-29 00:43:10 93.0 159.0 NaN I'm worried about current trends in the Minecr...
5226 5226 bbuks 44880 False False True True bilde2910 118898 bilde2910 4lbt77 2016-05-27 20:24:28 1.0 0.0 NaN "Reward: 2 Emergencies"
5227 5227 bbuks 44880 False False True True bilde2910 118898 bilde2910 4kzde5 2016-05-25 15:25:03 560.0 1803.0 h3h3productions [has been sued](https://www.yo... h3h3productions sued for copyright infringemen...
5228 5228 bbuks 44880 False False True True bilde2910 118898 bilde2910 4l1ycv 2016-05-26 00:23:28 0.0 0.0 NaN I experimented with timelapses a while ago. I ...
5229 5229 bbuks 44880 False False True True bilde2910 118898 bilde2910 4kmwcs 2016-05-23 10:50:04 87.0 1148.0 NaN I'm just casually browsing the web using Inter...
5230 5230 bbuks 44880 False False True True bilde2910 118898 bilde2910 4kd0na 2016-05-21 13:19:48 1.0 7.0 NaN A Minecraft server instance launched on Twitch...
5231 5231 bbuks 44880 False False True True bilde2910 118898 bilde2910 4ji5rm 2016-05-16 00:47:32 0.0 5.0 NaN Trailer driving through frozen wastelands
5232 5232 bbuks 44880 False False True True bilde2910 118898 bilde2910 4jhmw1 2016-05-15 22:46:36 0.0 1.0 [removed] LPT: If there are a handful of special charact...
5233 5233 bbuks 44880 False False True True bilde2910 118898 bilde2910 4jb2zh 2016-05-14 14:38:08 1.0 1.0 [removed] This is not an AMA.
5234 5234 bbuks 44880 False False True True bilde2910 118898 bilde2910 4j2ee4 2016-05-12 22:29:40 1.0 21.0 NaN The Norwegian Polar Institute has a database o...
5235 5235 bbuks 44880 False False True True bilde2910 118898 bilde2910 4ibtml 2016-05-08 00:29:22 40.0 120.0 [removed] I made a thing that lets you share Steam key g...
5236 5236 bbuks 44880 False False True True bilde2910 118898 bilde2910 4iaxow 2016-05-07 20:44:52 7.0 89.0 NaN [Groupees] The Fantasy Bundle #2 | Mystery of ...

5237 rows × 16 columns

In [3]:
fdf["Total_score"]= fdf.groupby("name").transform(np.sum).score
fdf["Total_num_comments"]= fdf.groupby("name").transform(np.sum).num_comments
In [9]:
fdf.dtypes
Out[9]:
Unnamed: 0              int64
ID_x                   object
comment_karma           int64
is employee              bool
is_friend                bool
is_gold                  bool
is_mod                   bool
name                   object
post_karma              int64
Author                 object
ID_y                   object
Submission_Created     object
num_comments          float64
score                 float64
selftext               object
title                  object
Total_score           float64
Total_num_comments    float64
dtype: object
In [4]:
test = fdf.sample(3000)
In [42]:
test
Out[42]:
Unnamed: 0 ID_x comment_karma is employee is_friend is_gold is_mod name post_karma Author ID_y Submission_Created num_comments score selftext title Total_score Total_num_comments
4369 4369 702ov 16072 False False True False headoverheals 2058 headoverheals 79526i 2017-10-27 21:26:11 5.0 10.0 NaN The Forum Pollâ„¢ - PC lead in Ontario Solid 1766.0 1466.0
3006 3006 81bjr 20705 False False True True MaceWumpus 470 MaceWumpus 181cpw 2013-02-07 04:41:24 5.0 2.0 Obviously, if you want to study Kant, you shou... Languages for graduate level philosophical study? 2198.0 1305.0
5020 5020 fxvfr 324627 False False True True ALLSTARTRIPOD 1021 ALLSTARTRIPOD 4oda4c 2016-06-16 16:22:08 713.0 9859.0 I am the commanding officer for a small band o... TIFU by accidentally sending one of my soldier... 11313.0 889.0
3036 3036 ep4r2 31729 False False True False Egon_1 140087 Egon_1 7kkl20 2017-12-18 14:11:42 168.0 224.0 NaN A Devastating ING Report about Bitcoin Core: "... 12640.0 3677.0
4364 4364 9xgkiz3 28164 False False True True Woofers_MacBarkFloof 5719 Woofers_MacBarkFloof 6zz5hv 2017-09-14 04:33:27 1.0 9.0 NaN Gimme that yarba 6181.0 1067.0
3097 3097 ep4r2 31729 False False True False Egon_1 140087 Egon_1 7hsuyb 2017-12-06 00:14:30 11.0 113.0 NaN Vinny Lingham: "Mission Accomplished: I finall... 12640.0 3677.0
5069 5069 b4m9h 64646 False False True False Timbitkiller 4099 Timbitkiller 4nnxe2 2016-06-12 02:59:33 4.0 1.0 NaN Eunicid Worm? This scary duder steals food fro... 3415.0 847.0
2655 2655 4x68b 526856 False False True False zazzlekdazzle 38177 zazzlekdazzle 5ugv2q 2017-02-16 21:31:09 523.0 14992.0 NaN Kiss cam mistake at the Pro Bowl 91952.0 4140.0
1898 1898 4hukh 82632 False False True False MarbledNightmare 52068 MarbledNightmare 6yz1xo 2017-09-09 04:33:25 10.0 47.0 NaN The Women of Tiger Muay Thai 18049.0 5365.0
3927 3927 17ed7y 106667 False False True False dust_wind 16930 dust_wind 7a46w6 2017-11-01 18:12:58 8.0 0.0 NaN When a right wing nut drove into a group of le... 61913.0 4599.0
832 832 6hzff 41181 False False True True flakingnapstich 14232 flakingnapstich 7gpbil 2017-11-30 23:26:20 0.0 1.0 NaN Man With Missing Wife Pleads Guilty to Having ... 2073.0 169.0
2350 2350 6zykq 20812 False False True False DrJWilson 2811 DrJWilson tpaa8 2012-05-16 05:16:09 17.0 18.0 I'm personally very new to finance. I already ... I just got my first real long-term job. What d... 4003.0 1317.0
4363 4363 9xgkiz3 28164 False False True True Woofers_MacBarkFloof 5719 Woofers_MacBarkFloof 6zxcob 2017-09-13 23:33:09 50.0 29.0 NaN GeOPOliTICal AnALyST 6181.0 1067.0
159 159 60siq 33855 False False True False attorneyriffic 903 attorneyriffic 2xumxu 2015-03-04 03:30:13 19.0 18.0 NaN What's Wrong with my Plasma? 10360.0 5612.0
3495 3495 3ekml 788878 False False True True captainmagictrousers 12934 captainmagictrousers 6m7o5k 2017-07-09 17:20:46 0.0 12.0 Girl, you're the one that I love\n\nYou're the... Girl, You're My Girl, Girl 22127.0 1061.0
2389 2389 rpo6p 3980 False False True False JonathanJK 1214 JonathanJK 6ia1os 2017-06-20 01:53:34 2.0 0.0 I have Videorama (imo best video editor on IOS... Travelling for a month on a train (Germany to ... 1555.0 437.0
4541 4541 9tcikor 1703 False False True False DarkSpace-Harbinger 3458 DarkSpace-Harbinger 7l1xav 2017-12-20 18:06:59 1.0 2.0 NaN Today on r/Bitcoin 3930.0 3021.0
2698 2698 bcynk 3385 False False True False -Travis 6124 -Travis 251p4i 2014-05-08 18:19:33 2.0 1.0 NaN Someone in my office is really serious about p... 7639.0 616.0
4170 4170 13sogx 525 False False True False BulTV 1936 BulTV 7l9g4w 2017-12-21 16:28:39 8.0 35.0 Hej,\ntoday is my last day in front of the com... My Christmas Present for you - Free PUBG Strea... 1878.0 808.0
1853 1853 4hukh 82632 False False True False MarbledNightmare 52068 MarbledNightmare 7esj7d 2017-11-22 20:15:40 135.0 770.0 NaN Per his mgmt, Mark Hunt was flown to Vegas yes... 18049.0 5365.0
946 946 6fm0w 165640 False False True True DoctorWhosOnFirst 42433 DoctorWhosOnFirst 7dhyiy 2017-11-17 06:21:53 3.0 7.0 NaN The Science of the Post: Going Deep with "Mills" 2696.0 4881.0
3125 3125 gmtl5 7069 False False True False Unbelievabob 258 Unbelievabob 5arxxu 2016-11-02 22:22:30 537.0 75.0 So, I've decided to finally get round to cashi... [Free] Goodbye /r/GlobalOffensiveTrade, 30 key... 363.0 968.0
4913 4913 92cdm 110183 False False True False sgthombre 23894 sgthombre 2vc2k5 2015-02-09 23:11:20 0.0 2.0 NaN I ruined it 4411.0 1361.0
910 910 6fm0w 165640 False False True True DoctorWhosOnFirst 42433 DoctorWhosOnFirst 7gocly 2017-11-30 21:18:55 12.0 6.0 Read anything good lately? Interesting Articles Thread 2696.0 4881.0
2537 2537 j0r12 17672 False False True False Ihavesomethingtosay9 385 Ihavesomethingtosay9 2y7l5x 2015-03-07 07:10:00 41.0 44.0 When: 2-3 years ago\n\nWhat: we got into an ar... Me [24 F] and my brother's girl friend [21 F] ... 1047.0 647.0
452 452 e1xb8 127100 False False True True Max_Trollbot_ 6304 Max_Trollbot_ 662bzv 2017-04-18 14:50:26 0.0 7.0 NaN Truck Stop Tamales 25287.0 1500.0
2887 2887 nmnn 543 False False True False Bastiat 5145 Bastiat vti1r 2012-06-30 02:14:36 49.0 24.0 NaN Walmart Canada will lower prices on 10,000 pro... 6279.0 2117.0
2513 2513 j0r12 17672 False False True False Ihavesomethingtosay9 385 Ihavesomethingtosay9 4z3nl5 2016-08-23 03:13:03 4.0 38.0 I think it's funny how people think I care tha... Collections Notice 1047.0 647.0
3619 3619 b6g8f 4783 False False True True lun471k 9454 lun471k 4hkaka 2016-05-03 01:53:25 40.0 76.0 NaN My new 2016 Carbon Gray Kawasaki ZX-10R ABS. M... 32778.0 2741.0
1513 1513 3sch0 200909 False False True True bakonydraco 22200 bakonydraco 70qbyt 2017-09-18 00:16:35 85.0 59.0 ## [Week 4](https://imgur.com/3IZQz1P.png)\n\n... AP Poll Voter Consistency Week 4 12123.0 6288.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2727 2727 3xro2 27341 False False True True GeneralissimoFranco 6437 GeneralissimoFranco 794mph 2017-10-27 20:20:01 10.0 4.0 NaN Conan, what is best in life? 3997.0 1018.0
5146 5146 bbuks 44880 False False True True bilde2910 118898 bilde2910 7j5b5n 2017-12-12 00:00:30 272.0 3993.0 NaN me📅irl 326014.0 10923.0
2395 2395 rpo6p 3980 False False True False JonathanJK 1214 JonathanJK 5kwvmf 2016-12-29 16:48:52 2.0 3.0 https://youtu.be/pN6RHWq27QI - wiki article - ... Before there were swimming pools in Hong Kong.... 1555.0 437.0
2133 2133 h1zyl 131073 False False True False thecricketnerd 2556 thecricketnerd 37i8f2 2015-05-27 23:46:44 0.0 7.0 NaN Chris Kattan was on Horatio's podcast today. S... 1974.0 1224.0
3393 3393 368g9 23756 False False True True strained_brain 3856 strained_brain 6lljit 2017-07-06 15:52:33 0.0 16.0 I propose, in honor of the NYC Chief of Police... Here's a thought... 4700.0 588.0
932 932 6fm0w 165640 False False True True DoctorWhosOnFirst 42433 DoctorWhosOnFirst 7ebnzp 2017-11-20 23:00:10 19.0 17.0 NaN Nick Saban provides updates on Alabama's injur... 2696.0 4881.0
2571 2571 602o7 22276 False False True True JustSomeBadAdvice 1072 JustSomeBadAdvice 1c5j46 2013-04-11 22:09:17 85.0 79.0 This situation is not the end of the world. I... You people are seriously not thinking clearly. 1343.0 819.0
4038 4038 12cfrx 132989 False False True True KVMechelen 16221 KVMechelen 6vjmlw 2017-08-23 18:16:42 69.0 2270.0 Crossposting this from r/soccer, slightly chan... So I decided to analyze every single corner go... 15752.0 3918.0
4261 4261 13sogx 525 False False True False BulTV 1936 BulTV 5ye67z 2017-03-09 12:37:41 3.0 6.0 **ABOUT:**\nOn my Website you can download Pan... Free Overlays for Streamers! Overlays, Panels ... 1878.0 808.0
2540 2540 j0r12 17672 False False True False Ihavesomethingtosay9 385 Ihavesomethingtosay9 2taqxm 2015-01-22 19:02:40 12.0 10.0 I have no idea how to look up how to become a ... I want to become a business teacher for high s... 1047.0 647.0
4529 4529 37c2y 28052 False False True False amheekin 8899 amheekin wvrh4 2012-07-20 20:12:23 10.0 20.0 NaN I'm fairly sure that this guy is played by Ton... 21276.0 3158.0
4515 4515 37c2y 28052 False False True False amheekin 8899 amheekin 2f9lof 2014-09-02 18:37:27 17.0 10.0 So this phrase has always blown my mind. I und... Some questions about "tu me manques" 21276.0 3158.0
4906 4906 92cdm 110183 False False True False sgthombre 23894 sgthombre 2xvqhl 2015-03-04 09:12:39 45.0 52.0 NaN ‘Wonder Woman’ Gal Gadot Gives Update on ‘Batm... 4411.0 1361.0
2150 2150 ng6st76 334 False False True False mcpat21 653 mcpat21 7l2mf6 2017-12-20 19:53:08 2.0 3.0 NaN What is a general misconception that is believ... 749.0 855.0
180 180 60siq 33855 False False True False attorneyriffic 903 attorneyriffic 2378eo 2014-04-16 21:23:13 0.0 0.0 [removed] What have I been missing? 10360.0 5612.0
4982 4982 dk9up 1391 False False True False mthilliard 128 mthilliard 75ijo7 2017-10-10 20:17:15 0.0 2.0 Never click on the bittrex ads. \nToday's link... New bittrex phishing attempt daily 173.0 243.0
2868 2868 nmnn 543 False False True False Bastiat 5145 Bastiat 7fj9u9 2017-11-26 03:32:03 6.0 40.0 NaN Crypto investor Peter Thiel: People are undere... 6279.0 2117.0
1697 1697 11lfrn 3172 False False True False sodapop66 1058 sodapop66 5qi0nf 2017-01-27 19:24:29 0.0 3.0 NaN Talsur - The Ravensong (if Tom Waits was in a ... 1366.0 416.0
4676 4676 dydyx 11569 False False True False _UpstateNYer_ 6820 _UpstateNYer_ 4l0r2e 2016-05-25 20:17:29 6.0 8.0 NaN When you're sharing externally with SharePoint... 14928.0 865.0
4370 4370 702ov 16072 False False True False headoverheals 2058 headoverheals 78f6vt 2017-10-24 14:48:15 17.0 0.0 NaN Toronto Sun: Taxpayers put up $300,000 to reno... 1766.0 1466.0
3290 3290 d73zv 20183 False False True True GeekyMeerkat 1456 GeekyMeerkat 6naz8p 2017-07-14 21:39:05 10.0 4.0 So this question isn't the classic question of... Warlock Pact of the Blade Question 1512.0 768.0
125 125 60siq 33855 False False True False attorneyriffic 903 attorneyriffic 5rirv4 2017-02-02 01:35:10 1.0 0.0 NaN State Question 780 sentences Cleveland County ... 10360.0 5612.0
1398 1398 daoor 29081 False False True True CineFunk 5042 CineFunk 6xxh4o 2017-09-04 05:15:42 1.0 3.0 NaN 2017 Chick-Fil-A Kick Off Classic: #3 Florida ... 2185.0 785.0
3142 3142 gmtl5 7069 False False True False Unbelievabob 258 Unbelievabob 4h8exa 2016-05-01 12:18:29 0.0 1.0 ***\n\n**Item:** Souvenir M4A1-S | Master Piec... [H] Souvenir Master Piece FT 0.23 [W] Keys 363.0 968.0
85 85 4amx5 11338 False False True False jorshrod 9785 jorshrod 6a16l6 2017-05-09 00:45:14 5.0 17.0 I have found some great savings in the last ye... Some hobby items can be cheaper directly from ... 9966.0 2503.0
546 546 e1xb8 127100 False False True True Max_Trollbot_ 6304 Max_Trollbot_ 26yxfu 2014-05-31 20:44:34 3.0 35.0 NaN Why didn't Back to the Future 2 come out befor... 25287.0 1500.0
4710 4710 dydyx 11569 False False True False _UpstateNYer_ 6820 _UpstateNYer_ 3oiogr 2015-10-13 02:51:32 12.0 8.0 NaN Does anyone know what the orange stuff is on t... 14928.0 865.0
3594 3594 b6g8f 4783 False False True True lun471k 9454 lun471k 6sm2uc 2017-08-09 18:07:56 0.0 7.0 It was getting out of hand (haha). \n\nSeriou... Taking back my life's ownership. 32778.0 2741.0
4806 4806 5g3kza2 27427 False False True False HardlightCereal 1409 HardlightCereal 6sjzbv 2017-08-09 10:48:07 3.0 35.0 NaN Mad libs time: Whoa _________, bambalam! 1904.0 501.0
3613 3613 b6g8f 4783 False False True True lun471k 9454 lun471k 4p0xed 2016-06-20 23:52:00 2.0 4.0 Anyone knows anything about insurance coverage... Wind knocked my bike over this afternoon. 32778.0 2741.0

3000 rows × 18 columns

In [6]:
str(test.Submission_Created)
test.dtypes
Out[6]:
Unnamed: 0              int64
ID_x                   object
comment_karma           int64
is employee              bool
is_friend                bool
is_gold                  bool
is_mod                   bool
name                   object
post_karma              int64
Author                 object
ID_y                   object
Submission_Created     object
num_comments          float64
score                 float64
selftext               object
title                  object
Total_score           float64
Total_num_comments    float64
dtype: object
In [58]:
Chart(fdf).mark_bar().encode(
    x=X('Author:O', sort=SortField(field='Total_num_comments', order='descending', op='max'),
        axis=Axis(title='Project')),
    y=Y('Total_num_comments:Q',
        axis=Axis(title='Total Score')),
    color ="is_mod"
        )
#we got our top 10 Authers from here.
In [47]:
Chart(fdf.nlargest(440,"Total_score")).mark_bar().encode(
    x=X('Author:O', sort=SortField(field='Total_score', order='descending', op='max'),
        axis=Axis(title='Author')),
    y=Y('Total_score:Q',
        axis=Axis(title='Total Score')),
    color ="is_mod"
        )
#Being a moderator or not doesn't make a diffirence in total score.
#Number in highest total score is a moderator named (bilde2910)
In [48]:
Chart(fdf.nlargest(480,"Total_num_comments")).mark_bar().encode(
    x=X('Author:O', sort=SortField(field='Total_num_comments', order='descending', op='max'),
        axis=Axis(title='Author')),
    y=Y('Total_num_comments:Q',
        axis=Axis(title='Total comments')),
    color ="is_mod"
        )
#We can see that the user (bilde2910) has the highest number of comments as well as the previous chart which he got the highets
#total score, which means that he is the most active user in our sample.
#we can also conclude that moderators got more comments than non-moderators.
In [18]:
fdf = final_df.sample(3000)
In [4]:
final_df=pd.read_csv('final_df.csv')
In [57]:
alt.Chart(fdf).mark_line().encode(x='Total_score',y='Total_num_comments')
#number of comments and score goes side to side with Total score reaches 80k.
In [31]:
alt.Chart(fdf).mark_line().encode(
    y='mean(Total_score)',
    x=alt.X('Submission_Created:T', timeUnit='year'),
)
#since 2013 the total score has been increasing.
#further invistigation to see why it droppd on 2010.
In [19]:
fdf["Total_score"]= fdf.groupby("name").transform(np.sum).score
fdf["Total_num_comments"]= fdf.groupby("name").transform(np.sum).num_comments
In [38]:
alt.Chart(fdf).mark_line().encode(
    y='mean(Total_num_comments)',
    x=alt.X('Submission_Created:T', timeUnit='year'),
    color='is_mod',
)
In [39]:
alt.Chart(fdf).mark_line().encode(
    y='mean(Total_score)',
    x=alt.X('Submission_Created:T', timeUnit='year'),
    color='is_mod',
)
#In 2013 Reddit introduces two kinds of moderators: those with full permissions, and those with limited permissions. 
#So the number of moderators increased since then and the website got more active.
In [55]:
fdf.head()
Out[55]:
Unnamed: 0 ID_x comment_karma is employee is_friend is_gold is_mod name post_karma Author ID_y Submission_Created num_comments score selftext title Total_score Total_num_comments
1209 1209 dalvl 88257 False False True False Rekdon 55129 Rekdon 76cjjr 2017-10-14 17:55:00 8.0 2.0 NaN What is your "I need my coffee in the morning"... 8580.0 515.0
2673 2673 4x68b 526856 False False True False zazzlekdazzle 38177 zazzlekdazzle 57bd5k 2016-10-13 20:06:41 275.0 629.0 NaN How Video Games Are Changing the Way Soccer is... 88603.0 3602.0
2274 2274 6zykq 20812 False False True False DrJWilson 2811 DrJWilson 7c9caa 2017-11-11 19:32:57 3.0 19.0 NaN Writing about Place: The Bustling City Setting... 2186.0 716.0
4731 4731 j8ikd 63272 False False True False ScaryKerry91476 23141 ScaryKerry91476 6z4wir 2017-09-10 02:11:31 201.0 1634.0 So. Yesterday was supposed to be the day. We w... A Mommy Fearest update - Is it possible that a... 24554.0 2548.0
5231 5231 bbuks 44880 False False True True bilde2910 118898 bilde2910 4ji5rm 2016-05-16 00:47:32 0.0 5.0 NaN Trailer driving through frozen wastelands 224419.0 7012.0